# -*- coding: utf-8 -*-

# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
import hashlib
import logging

import scrapy
from scrapy.pipelines.images import ImagesPipeline
from w3lib.util import to_bytes

from scrapy_zhaohaofang.spiders.utils.zhaohaofang import ZhfBaseDBPipeline


class ScrapyZhaohaofangDBFirstPipeline(ZhfBaseDBPipeline):

    # 页面数据存储逻辑:
    # 1: 将图片存储到本地;
    # 2: 匹配物业,小区等信息;
    # 3: 将页面信息 和 上面的物业小区信息 综合起来 写入 数据库 t_hvs表 并返回 此次hvs_id
    # 4: 将hvs_id 作为 pic表的pid 用于保存图片表
    # 5: 将hvs_id 作为 rnt表的hid 用作保存租房信息

    # todo: 1:实现将两张基础表 先缓存起来 ,  这就涉及到 python 使用缓存,后期再实现吧
    # TODO: 2: 实现多线程, 一个线程 去实现比对基础表的信息, 另外一个线程 去实现将图片从网上下载到本地并返回路径

    # 析构函数, 用于销毁对象
    def __del__(self):
        pass

    def process_item(self, item, spider):
        query = self.dbpool_pro.runInteraction(self.process, item)
        query.addErrback(self.handle_error)
        return item

    def process(self, cursor, item):
        self.match_qrt(cursor, item)
        self.match_prt(cursor, item)
        self.insert_into_hvs_table(cursor, item)
        self.insert_into_rnt_table(cursor, item)

    # 匹配小区情况
    # 两种方法:
    # 1: 使用 insert ignore  + select ; 这样 每一次 都是 两步
    # insert_ignore_qrt_sql = 'insert ignore into qrt (name, addr) values({qrt_name},{qrt_addr})'.format(
    #     qrt_name=qrt_name, qrt_addr=qrt_addr
    # )
    # logging.info("通过小区名字匹配小区情况的sql是" % insert_ignore_qrt_sql)
    # 2: 先 select 再 insert 再 select ; 这样 出现 三步 或 一步 ,
    # 但考虑到 随着数据数据的增多 往往 一步就可以办到 , 所以 这里 采用方案二
    @staticmethod
    def match_qrt(cursor, item):
        qrt_name = item["qrt_name"]
        qrt_addr = item["qrt_addr"]
        select_qrt_by_name_sql = "select id as qrt_id, name as qrt_name, dsct as qrt_dsct, dnct as qrt_dnct, " \
                                 "addr as qrt_addr from t_qrt where instr(name, '{qrt_name}') > 0 ".format(
            qrt_name=qrt_name)
        logging.info("通过小区名字匹配小区信息的sql是: %s" % select_qrt_by_name_sql)
        cursor.execute(select_qrt_by_name_sql)
        qrt_db_dict = cursor.fetchone()
        if not qrt_db_dict:
            insert_qrt_sql = "insert into t_qrt (name, addr) values('{qrt_name}','{qrt_addr}')".format(
                qrt_name=qrt_name, qrt_addr=qrt_addr
            )
            logging.info("插入 小区表 qrt_table 的 sql是: %s" % insert_qrt_sql)
            cursor.execute(insert_qrt_sql)
            cursor.execute(select_qrt_by_name_sql)
            qrt_db_dict = cursor.fetchone()
            if not qrt_db_dict:
                logging.info("插入 小区表 qrt_table 失败", item)
        if not qrt_db_dict:
            pass
        else:
            item["qrt_id"] = qrt_db_dict["qrt_id"]
            item["qrt_name"] = qrt_db_dict["qrt_name"]
            item["qrt_dsct"] = "" if not qrt_db_dict["qrt_dsct"] else qrt_db_dict["qrt_dsct"]
            item["qrt_dnct"] = "" if not qrt_db_dict["qrt_dnct"] else qrt_db_dict["qrt_dnct"]
            item["qrt_addr"] = qrt_db_dict["qrt_addr"]
            return item

    # 匹配物业情况
    @staticmethod
    def match_prt(cursor, item):
        prt_name = item["prt_name"]
        select_prt_by_name_sql = "select id as prt_id, name as prt_name " \
                                 "from t_prt where instr(name, '{prt_name}') > 0 ".format(prt_name=prt_name)
        logging.info("通过物业公司名字匹配物业信息的sql是: %s" % select_prt_by_name_sql)
        cursor.execute(select_prt_by_name_sql)
        prt_db_dict = cursor.fetchone()
        if not prt_db_dict:
            insert_prt_sql = "insert into t_prt (name) values('{prt_name}')".format(prt_name=prt_name)
            logging.info("插入 物业表 prt_table 的 sql是: %s" % insert_prt_sql)
            cursor.execute(insert_prt_sql)
            cursor.execute(select_prt_by_name_sql)
            prt_db_dict = cursor.fetchone()
            if not prt_db_dict:
                logging.info("插入 物业表 prt_table 失败", item)
        if not prt_db_dict:
            pass
        else:
            item["prt_id"] = prt_db_dict["prt_id"]
            item["prt_name"] = prt_db_dict["prt_name"]
            return item

    # 插入hvs表
    @staticmethod
    def insert_into_hvs_table(cursor, item):
        insert_hvs_sql = \
            "insert into t_hvs (pid, qid, bid, name, dsct, dnct, addr, cbd, rom, hal, " \
            "tlt, flr, ornt, flrs, kwd, cntr, cntrt, cntrm, ctmb, ctmc, ctmd) values" \
            "('{pid}', '{qid}', '{bid}', '{name}', '{dsct}', '{dnct}', '{addr}', '{cbd}', " \
            "'{room}', '{hal}', '{tlt}','{flr}','{ornt}'," \
            "'{flrs}', '{kwd}', '{cntr}', '{cntrt}','{cntrm}', curdate(), curdate(), curdate()) ".format(
                pid=item["prt_id"] if 'prt_id' in item and item["prt_id"] else 0,
                qid=item["qrt_id"] if 'qrt_id' in item and item["qrt_id"] else 0,
                bid=item["bid"],
                name=item["name"],
                dsct=item["qrt_dsct"] if 'qrt_dsct' in item and item["qrt_dsct"] else "",
                dnct=item["qrt_dnct"] if 'qrt_dnct' in item and item["qrt_dnct"] else "",
                addr=item["qrt_addr"],
                cbd=item["cbd"],
                room=item["room"],
                hal=item["hal"],
                tlt=item["tlt"],
                flr=item["flr"],
                ornt=item["ornt"],
                flrs=item["flrs"],
                kwd=item["kwd"],
                cntr=item["cntr"],
                cntrt=item["cntrt"],
                cntrm=item["cntrm"]
            )
        logging.info("插入hvs表的sql是: %s" % insert_hvs_sql)
        cursor.execute(insert_hvs_sql)
        item["pic_pid"] = cursor.lastrowid
        return item

    # 插入 rnt表
    @staticmethod
    def insert_into_rnt_table(cursor, item):

        insert_rnt_sql = "insert into t_rnt (hid, mny, dpst, pay, rom, gdr, area, ctm, dsct, ttl, cntr, tel, mbl) values " \
                         "('{hid}', '{mny}', '{dpst}', '{pay}', '{rom}', '{gdr}', '{area}', " \
                         "'{ctm}', '{dsct}', '{ttl}', '{cntr}', '{tel}', '{mbl}' )".format(
            hid=item["pic_pid"],
            mny=item["mny"],
            dpst=item["dpst"],
            pay=item["pay"],
            rom=item["rom"],
            gdr=item["gdr"],
            area=item["area"],
            ctm=item["ctm"],
            dsct=item["qrt_dsct"],
            ttl=item["ttl"],
            cntr=item["cntr"],
            tel=item["cntrm"],
            mbl=item["cntrt"],
        )
        logging.info("插入rnt表的sql是: %s" % insert_rnt_sql)
        cursor.execute(insert_rnt_sql)


class ScrapyZhaohaofangDBAfterPicPipeline(ZhfBaseDBPipeline):
    def process_item(self, item, spider):
        query = self.dbpool_pro.runInteraction(self.process, item)
        query.addErrback(self.handle_error)
        return item

    def process(self, cursor, item):
        self.insert_into_pic_table(cursor, item)

    # 插入pic表
    @staticmethod
    def insert_into_pic_table(cursor, item):
        pid = item["pic_pid"]
        if 'pic_paths' in item:
            pic_paths = item["pic_paths"]
            if pic_paths:
                for pic_path in pic_paths:
                    insert_pic_sql = "insert into t_pic (pid, pth, knd) values ('{pid}', '{path}', " \
                                     "'5')".format(pid=pid, path=pic_path)
                    cursor.execute(insert_pic_sql)
                    logging.info("插入pic表的sql是: %s" % insert_pic_sql)
            else:
                logging.info("没有执行pic插入表的操作, pic_paths 的值为: %s ." % pic_paths)
        else:
            logging.info("没有执行pic插入表的操作, 因为 pic_paths 不在 item中")


class ScrapyZhaohaofangPicPipeline(ImagesPipeline):
    def get_media_requests(self, item, info):
        # 发起壁纸下载请求
        for image_url in item['pic_urls']:
            yield scrapy.Request(
                image_url,
                meta={'item': item}
            )

    def file_path(self, request, response=None, info=None):
        item = request.meta['item']
        image_guid = hashlib.sha1(to_bytes(request.url)).hexdigest()
        path = "/pic/hvsPic/%s/%s.jpg" % (item['pic_pid'], image_guid)
        return path

    def item_completed(self, results, item, info):
        image_paths = [x['path'] for ok, x in results if ok]
        if not image_paths:
            logging.info("没有图片url")
        else:
            item['pic_paths'] = image_paths
        return item
